#build correlation dataframe for phase1 data
cor.frame<-data.frame(matrix(, nrow=0, ncol=10)) #init df
for (i in sig.pos){
  frq<-as.numeric(as.vector(phase1.all.freqs.lfmm.hum[phase1.all.freqs.lfmm.hum$id == i,3:16]))
  mod<-summary(lm(frq ~ sort.pops.phase1$hannual)) #perform linear reg
  # store 9 values describing the correlation to feed into machine learning
  cor.frame<-rbind(cor.frame, cbind(i,
                                    max(abs(mod$residuals)),
                                    median(mod$residuals),
                                    mod$coefficients[1,1],
                                    mod$sigma,
                                    mod$r.squared,
                                    mod$fstatistic[1],
                                    cov(frq, sort.pops.phase1$hannual),
                                    cor(frq, sort.pops.phase1$hannual),
                                    sort(abs(frq-mean(frq)))[14]-sort(abs(frq-mean(frq)))[13]
                                    )
                  )
}

#fix row/colnames
colnames(cor.frame)<-c("id","maxresid","medresid","intercept","stdevresid","rsquared","f","cov","cor","outlier")
rownames(cor.frame) <- c()
#make numeric columns numeric
for (i in 2:10){
  cor.frame[,i]<-as.numeric(as.character(cor.frame[,i]))
  
}
#check
class(cor.frame$id)
## [1] "factor"
class(cor.frame$medresid)
## [1] "numeric"
#calc correlation between these variables
round(cor(cor.frame[,-1]), 2)
##            maxresid medresid intercept stdevresid rsquared     f   cov   cor
## maxresid       1.00    -0.05     -0.03       0.83    -0.69 -0.58  0.11  0.08
## medresid      -0.05     1.00      0.24       0.19     0.07  0.07 -0.02  0.04
## intercept     -0.03     0.24      1.00       0.15     0.01  0.09 -0.94 -0.88
## stdevresid     0.83     0.19      0.15       1.00    -0.57 -0.47  0.00 -0.03
## rsquared      -0.69     0.07      0.01      -0.57     1.00  0.94 -0.06 -0.04
## f             -0.58     0.07      0.09      -0.47     0.94  1.00 -0.13 -0.10
## cov            0.11    -0.02     -0.94       0.00    -0.06 -0.13  1.00  0.94
## cor            0.08     0.04     -0.88      -0.03    -0.04 -0.10  0.94  1.00
## outlier        0.60    -0.26     -0.33       0.15    -0.38 -0.35  0.28  0.30
##            outlier
## maxresid      0.60
## medresid     -0.26
## intercept    -0.33
## stdevresid    0.15
## rsquared     -0.38
## f            -0.35
## cov           0.28
## cor           0.30
## outlier       1.00
#going to drop f and cov because they are highly correlated
cor.frame<-cor.frame[,c(1:6,9:10)]

We are going to drop f and cov due to high correlation w/ other variables.

#pull out our random subset to hand identify as training dataset
#we did this in the R script, and saved these 100 random SNPs to a csv
cor.training<-read.csv(file="~/Downloads/lfmm.hum.cor.training.csv")[,2:9]
par(mfrow=c(3,3))
#plot
#pull in the IDs of the 100 randomly selected training SNPs
sig.pos<-as.vector(cor.training$id)
for (i in sig.pos){
  frq<-as.numeric(as.vector(phase1.all.freqs[phase1.all.freqs$id == i,3:16]))
  plot(sort.pops.phase1$hannual,frq, main = i, ylim =c(0,1))
  abline(lm(frq~sort.pops.phase1$hannual))
}

#hand classify this training set
#"environment"=0, "outlier"=1
vec<-c(0,1,0,0,0,1,1,0,1,
       0,0,1,1,1,1,0,0,0,
       0,1,1,1,0,1,1,0,1,
       1,1,1,0,1,1,0,1,0,
       1,0,1,0,0,0,0,0,1,
       1,1,1,1,1,0,1,1,1,
       1,1,1,0,0,1,0,1,1,
       0,0,0,0,0,1,0,1,1,
       0,0,0,1,0,1,1,0,0,
       1,0,0,0,0,1,1,1,1,
       0,1,0,1,0,0,0,0,1,
       0)
vec[vec == 0]<-"environment"
vec[vec == 1]<-"outlier"

#add hand classification to the training set
cor.training$class<-as.factor(vec)
#write.csv(cor.training, file="~/Downloads/lfmm.hum.cor.training.csv")

#train the model (not including max residual which decreased prediction accuracy)
m <- naiveBayes(cor.training[,2:8], cor.training[,9])
#check predictions against training data
table(predict(m, cor.training[,2:8]), cor.training[,9])
##              
##               environment outlier
##   environment          42       6
##   outlier               7      45
cor.training$pred.class<-predict(m, cor.training[,2:8]) #add prediction to df
cor.training$prob.env<-predict(m, cor.training[,2:8], type="raw")[,1] #add prediction probability to df
cor.training$prob.out<-predict(m, cor.training[,2:8], type="raw")[,2] #add prediction probability to df
#do prediction
cor.frame$class<-predict(m, cor.frame[,2:8]) #add prediction to df
cor.frame$prob.env<-predict(m, cor.frame[,2:8], type="raw")[,1] #add prediction probability to df
cor.frame$prob.out<-predict(m, cor.frame[,2:8], type="raw")[,2] #add prediction probability to df

#visualize prediction
par(mfrow=c(1,2))
pc.df<-as.data.frame(prcomp(cor.training[,2:8])$x)
plot(pc.df$PC1,pc.df$PC2, col=cor.training$class, main = "hand classified")
plot(pc.df$PC1,pc.df$PC2, col=predict(m, cor.training[,2:8]), main = "ML predicted")

#histograms of individual variables by predicted class
hist(cor.frame$cor[cor.frame$class=="environment"], main="corr",
     col = rgb(173,216,230,max = 255, alpha = 80, names = "lt.blue"), xlim=c(0,1), ylim=c(0,1750))
hist(cor.frame$cor[cor.frame$class=="outlier"],
     col = rgb(255,192,203, max = 255, alpha = 80, names = "lt.pink"), add =TRUE)
hist(cor.frame$outlier[cor.frame$class=="environment"], main="outlier",
     col = rgb(173,216,230,max = 255, alpha = 80, names = "lt.blue"), xlim=c(0,1))
hist(cor.frame$outlier[cor.frame$class=="outlier"],
     col = rgb(255,192,203, max = 255, alpha = 80, names = "lt.pink"), add =TRUE)

#pca of all predicted points
par(mfrow=c(1,1))
pc.df<-as.data.frame(prcomp(cor.frame[,2:8])$x)
plot(pc.df$PC1,pc.df$PC2, col=cor.frame$class, main = "ML predicted")

#visualize specific training points
par(mfrow=c(3,3))
#plot
sig.pos<-as.vector(cor.training$id)
j<-1 #initialize loop tracker
for (i in sig.pos){
  frq<-as.numeric(as.vector(phase1.all.freqs[phase1.all.freqs$id == i,3:16]))
  if (cor.training$class[j] == cor.training$pred.class[j]){
  plot(sort.pops.phase1$hannual,frq, main = i, ylim =c(0,1),
       xlab=paste("prob env",round(cor.training$prob.env[j],2)), ylab=paste("prob out",round(cor.training$prob.out[j],2)))
       abline(lm(frq~sort.pops.phase1$hannual))
  }
  else{
    plot(sort.pops.phase1$hannual,frq, main = i, ylim =c(0,1), col = "red",
         xlab=paste("prob env",round(cor.training$prob.env[j],2)), ylab=paste("prob out",round(cor.training$prob.out[j],2)))
    abline(lm(frq~sort.pops.phase1$hannual), col="red")
  }
  j<-j+1
}

#check prediction
table(cor.frame$class)
## 
## environment     outlier 
##        5878        5103
par(mfrow=c(1,1))
hist(cor.frame$prob.env)

#split confidently identified SNPs by class
cor.frame.outliers<-cor.frame[cor.frame$prob.out >.8,]
cor.frame.env<-cor.frame[cor.frame$prob.env >.8,]
#start with environmentally correlated SNPs
#generate matching id's for the outlier dataset in question
sig.pos<-as.vector(cor.frame.env$id)
#subset all AF values to only the outlier dataset, to save time rather than searching the entire 3M snp dataset
phase2.all.freqs.lfmm.hum <- phase2.all.freqs[phase2.all.freqs$id %in% sig.pos,]
#generate list of SNP IDs found in phase2
sig.pos<-phase2.all.freqs.lfmm.hum$id

#calc phase2 cor.frame
cor.frame.env.2<-data.frame(matrix(, nrow=0, ncol=8)) #init df
for (i in sig.pos){
  frq<-as.numeric(as.vector(phase2.all.freqs.lfmm.hum[phase2.all.freqs.lfmm.hum$id == i,3:23]))
  mod<-summary(lm(frq ~ sort.pops.phase2$hannual)) #perform linear reg
  # store 9 values describing the correlation to feed into machine learning
  cor.frame.env.2<-rbind(cor.frame.env.2, cbind(i,
                                                          max(abs(mod$residuals)),
                                                          median(mod$residuals),
                                                          mod$coefficients[1,1],
                                                          mod$sigma,
                                                          mod$r.squared,
                                                          cor(frq, sort.pops.phase2$hannual),
                                                          sort(abs(frq-mean(frq)))[21]-sort(abs(frq-mean(frq)))[20]
  )
  )
}

#fix row/colnames
colnames(cor.frame.env.2)<-c("id","maxresid","medresid","intercept","stdevresid","rsquared","cor","outlier")
rownames(cor.frame.env.2) <- c()
#make numeric columns numeric
for (i in 2:8){
  cor.frame.env.2[,i]<-as.numeric(as.character(cor.frame.env.2[,i]))
}

#merge phase1 and phase2 data, retaining only SNPs called in both phases
match.cor.frame<-merge(cor.frame.env, cor.frame.env.2, by='id')
#subtract each column
diff.frame<-data.frame(id=match.cor.frame$id,
                       max.resid=abs(match.cor.frame$maxresid.x-match.cor.frame$maxresid.y),
                       med.resid=abs(match.cor.frame$medresid.x-match.cor.frame$medresid.y),
                       int=abs(match.cor.frame$intercept.x-match.cor.frame$intercept.y),
                       stdevresid=abs(match.cor.frame$stdevresid.x-match.cor.frame$stdevresid.y),
                       rsquared=abs(match.cor.frame$rsquared.x-match.cor.frame$rsquared.y),
                       cor=abs(match.cor.frame$cor.x-match.cor.frame$cor.y),
                       outlier=abs(match.cor.frame$outlier.x-match.cor.frame$outlier.y)
)
#pull out our random subset to hand identify as training dataset
#this step has been done, so we read in csv here
validate.env.training.set<-read.csv(file="~/Downloads/lfmm.hum.validate.env.csv")[,2:9]

par(mfrow=c(3,3))
#plot
sig.pos<-as.vector(validate.env.training.set$id)
for (i in sig.pos){
  frq<-as.numeric(as.vector(phase1.all.freqs[phase1.all.freqs$id == i,3:16]))
  frq2<-as.numeric(as.vector(phase2.all.freqs[phase2.all.freqs$id == i,3:23]))
  plot(sort.pops.phase1$hannual,frq, main = i, ylim =c(0,1))
  abline(lm(frq~sort.pops.phase1$hannual))
  points(sort.pops.phase2$hannual,frq2, col="red")
  abline(lm(frq2~sort.pops.phase2$hannual), col="red")
}

#"accept"=1,"reject"=0
vec<-c(1,1,1,1,1,1,1,0,1,
       0,0,0,1,0,1,1,0,0,
       0,0,1,0,1,0,0,1,1,
       0,1,0,0,0,0,0,0,0,
       0,1,0,1,0,1,1,1,1,
       0,1,0,1,0,1,0,1,1,
       1,1,1,1,1,1,1,1,1,
       0,0,1,1,0,0,1,0,0,
       0,1,1,1,0,1,0,0,0,
       0,0,1,1,1,0,0,1,0,
       1,0,1,1,1,1,1,0,1,
       0)
vec[vec == 0]<-"reject"
vec[vec == 1]<-"accept"

#add hand classification to the training set
validate.env.training.set$class<-as.factor(vec)
#write.csv(validate.env.training.set, file="~/Downloads/lfmm.hum.validate.env.csv")

#train the model (not including max residual which decreased prediction accuracy)
m <- naiveBayes(validate.env.training.set[,2:8], validate.env.training.set[,9])
#check predictions against training data
table(predict(m, validate.env.training.set[,2:8]), validate.env.training.set[,9])
##         
##          accept reject
##   accept     46     12
##   reject      8     34
validate.env.training.set$pred.class<-predict(m, validate.env.training.set[,2:8]) #add prediction to df
validate.env.training.set$prob.accept<-predict(m, validate.env.training.set[,2:8], type="raw")[,1] #add prediction probability to df

#do prediction
diff.frame$class<-predict(m, diff.frame[,2:8]) #add prediction to df
diff.frame$prob.accept<-predict(m, diff.frame[,2:8], type="raw")[,1] #add prediction probability to df
#visualize prediction
par(mfrow=c(1,2))
pc.df<-as.data.frame(prcomp(validate.env.training.set[,2:8])$x)
plot(pc.df$PC1,pc.df$PC2, col=validate.env.training.set$class, main = "hand classified")
plot(pc.df$PC1,pc.df$PC2, col=predict(m, validate.env.training.set[,2:8]), main = "ML predicted")

#check prediction
table(diff.frame$class)
## 
## accept reject 
##   2437   2139
par(mfrow=c(1,1))
hist(diff.frame$prob.accept)

#split confidently identified SNPs by class
vetted.lfmm.hum.env<-diff.frame[diff.frame$prob.accept >.8,]
#add order column from allsigs to visualize this on a manhattan plot
all.sigs.env <- all.sigs[paste(all.sigs$chrom, all.sigs$pos) %in% vetted.lfmm.hum.env$id,]
all.sigs.env<-data.frame(order=all.sigs.env$order,
                         id=paste(all.sigs.env$chrom,all.sigs.env$pos))
vetted.lfmm.hum.env<-merge(all.sigs.env,vetted.lfmm.hum.env,by="id")
#write.csv(vetted.lfmm.hum.env[,1:2], file = "~/Downloads/vetted.lfmm.hum.env.csv")
#####
####
###
##
#now outliers
#generate matching id's for the outlier dataset in question
sig.pos<-as.vector(cor.frame.outliers$id)
#subset all AF values to only the outlier dataset, to save time rather than searching the entire 3M snp dataset
phase2.all.freqs.lfmm.hum <- phase2.all.freqs[phase2.all.freqs$id %in% sig.pos,]
#generate list of SNP IDs found in phase2
sig.pos<-phase2.all.freqs.lfmm.hum$id

#calc phase2 cor.frame
cor.frame.outliers.2<-data.frame(matrix(, nrow=0, ncol=8)) #init df
for (i in sig.pos){
  frq<-as.numeric(as.vector(phase2.all.freqs.lfmm.hum[phase2.all.freqs.lfmm.hum$id == i,3:23]))
  mod<-summary(lm(frq ~ sort.pops.phase2$hannual)) #perform linear reg
  # store 9 values describing the correlation to feed into machine learning
  cor.frame.outliers.2<-rbind(cor.frame.outliers.2, cbind(i,
                                    max(abs(mod$residuals)),
                                    median(mod$residuals),
                                    mod$coefficients[1,1],
                                    mod$sigma,
                                    mod$r.squared,
                                    cor(frq, sort.pops.phase2$hannual),
                                    sort(abs(frq-mean(frq)))[21]-sort(abs(frq-mean(frq)))[20]
                                      )
                    )
}

#fix row/colnames
colnames(cor.frame.outliers.2)<-c("id","maxresid","medresid","intercept","stdevresid","rsquared","cor","outlier")
rownames(cor.frame.outliers.2) <- c()
#make numeric columns numeric
for (i in 2:8){
  cor.frame.outliers.2[,i]<-as.numeric(as.character(cor.frame.outliers.2[,i]))
}

#merge phase1 and phase2 data, retaining only SNPs called in both phases
match.cor.frame<-merge(cor.frame.outliers, cor.frame.outliers.2, by='id')
#subtract each column
diff.frame<-data.frame(id=match.cor.frame$id,
                       max.resid=abs(match.cor.frame$maxresid.x-match.cor.frame$maxresid.y),
                       med.resid=abs(match.cor.frame$medresid.x-match.cor.frame$medresid.y),
                       int=abs(match.cor.frame$intercept.x-match.cor.frame$intercept.y),
                       stdevresid=abs(match.cor.frame$stdevresid.x-match.cor.frame$stdevresid.y),
                       rsquared=abs(match.cor.frame$rsquared.x-match.cor.frame$rsquared.y),
                       cor=abs(match.cor.frame$cor.x-match.cor.frame$cor.y),
                       outlier=abs(match.cor.frame$outlier.x-match.cor.frame$outlier.y)
                       )
#pull out our random subset to hand identify as training dataset
#here we pull in the same SNPs randomly selected in the R script
validate.outliers.training.set<-read.csv(file="~/Downloads/lfmm.hum.validate.outliers.csv")
validate.outliers.training.set<-validate.outliers.training.set[,2:9]

par(mfrow=c(3,3))
#plot
sig.pos<-as.vector(validate.outliers.training.set$id)
for (i in sig.pos){
  frq<-as.numeric(as.vector(phase1.all.freqs[phase1.all.freqs$id == i,3:16]))
  frq2<-as.numeric(as.vector(phase2.all.freqs[phase2.all.freqs$id == i,3:23]))
  plot(sort.pops.phase1$hannual,frq, main = i, ylim =c(0,1))
  abline(lm(frq~sort.pops.phase1$hannual))
  points(sort.pops.phase2$hannual,frq2, col="red")
  abline(lm(frq2~sort.pops.phase2$hannual), col="red")
}

#"accept"=1,"reject"=0
vec<-c(0,0,0,0,1,1,1,0,0,
       1,0,1,1,0,0,1,1,1,
       1,0,0,1,1,0,0,0,1,
       1,1,1,0,0,0,1,1,1,
       0,0,0,1,1,1,1,0,0,
       1,0,0,0,0,1,0,0,0,
       0,1,1,0,1,1,0,0,0,
       1,0,0,1,0,0,0,1,0,
       1,1,1,0,0,1,1,0,1,
       1,0,0,1,1,0,0,1,1,
       1,0,0,0,0,0,1,1,0,
       1)
vec[vec == 0]<-"reject"
vec[vec == 1]<-"accept"

#add hand classification to the training set
validate.outliers.training.set$class<-as.factor(vec)
#write.csv(validate.outliers.training.set, file="~/Downloads/lfmm.hum.validate.outliers.csv")

#train the model (not including max residual which decreased prediction accuracy)
m <- naiveBayes(validate.outliers.training.set[,2:8], validate.outliers.training.set[,9])
#check predictions against training data
table(predict(m, validate.outliers.training.set[,2:8]), validate.outliers.training.set[,9])
##         
##          accept reject
##   accept     44     13
##   reject      3     40
validate.outliers.training.set$pred.class<-predict(m, validate.outliers.training.set[,2:8]) #add prediction to df
validate.outliers.training.set$prob.accept<-predict(m, validate.outliers.training.set[,2:8], type="raw")[,1] #add prediction probability to df
#do prediction
diff.frame$class<-predict(m, diff.frame[,2:8]) #add prediction to df
diff.frame$prob.accept<-predict(m, diff.frame[,2:8], type="raw")[,1] #add prediction probability to df

#visualize prediction
par(mfrow=c(1,2))
pc.df<-as.data.frame(prcomp(validate.outliers.training.set[,2:8])$x)
plot(pc.df$PC1,pc.df$PC2, col=validate.outliers.training.set$class, main = "hand classified")
plot(pc.df$PC1,pc.df$PC2, col=predict(m, validate.outliers.training.set[,2:8]), main = "ML predicted")

#check prediction
table(diff.frame$class)
## 
## accept reject 
##   1796   1690
par(mfrow=c(1,1))
hist(diff.frame$prob.accept)

#split confidently identified SNPs by class
vetted.lfmm.hum.outliers<-diff.frame[diff.frame$prob.accept >.8,]
#add order column from allsigs to visualize this on a manhattan plot
all.sigs.out <- all.sigs[paste(all.sigs$chrom, all.sigs$pos) %in% vetted.lfmm.hum.outliers$id,]
all.sigs.out<-data.frame(order=all.sigs.out$order,
                         id=paste(all.sigs.out$chrom,all.sigs.out$pos))
vetted.lfmm.hum.outliers<-merge(all.sigs.out,vetted.lfmm.hum.outliers,by="id")

#pull allele freqs for outliers
phase1.all.freqs.vetted.outliers<- phase1.all.freqs[phase1.all.freqs$id %in% vetted.lfmm.hum.outliers$id,]

#identify the outlier pop
outlier.pop<-c()
for (i in 1:nrow(phase1.all.freqs.vetted.outliers)){
outlier.pop[i]<-colnames(phase1.all.freqs.vetted.outliers)[3:16][outlier(as.numeric(phase1.all.freqs.vetted.outliers[i,3:16]), logical = TRUE)]
}
#add to df
vetted.lfmm.hum.outliers$outlier.pop<-outlier.pop
nrow(lfmm.hum.outliers)
## [1] 10981
nrow(cor.frame.outliers)
## [1] 3920
nrow(cor.frame.env)
## [1] 4989
nrow(vetted.lfmm.hum.env)
## [1] 2088
table(vetted.lfmm.hum.outliers$outlier.pop)
## 
## X.3.635 X.3.862 X.8.821  X0.384   X0.77 X11.233 X11.891 
##      14       4     661     712      17       2      95
#write.csv(vetted.lfmm.hum.outliers[,c(1:2,12)], file = "~/Downloads/vetted.lfmm.hum.outliers.csv")

3.635 = Kenya Mbogolo (KE) 3.862 = Kenya Junju (KE) 8.821 = Angola Luanda (AO) 0.384 = Gabon Libreville (GA) 0.77 = Uganda Tororo (UG) 11.233 = Burkina Faso Bana (BF) 11.891 = Guinea Bissau Antula (GW)

#bring in selective sweep data for select populations
KE<-read.csv(file="~/Downloads/KE.hstats.csv")
AO<-read.csv(file="~/Downloads/AO.hstats.csv")
GA<-read.csv(file="~/Downloads/GA.hstats.csv")
UG<-read.csv(file="~/Downloads/UG.hstats.csv")
BFM<-read.csv(file="~/Downloads/BFM.hstats.csv")
BFS<-read.csv(file="~/Downloads/BFS.hstats.csv")
GW<-read.csv(file="~/Downloads/GW.hstats.csv")
#bring in all significant outliers
all.sigs<-read.csv(file="~/Desktop/anoph.phase2/all.sigs.fdr.adj.csv")
#bring in vetted outlier datasets
lfmm.hum.single.pop.outliers<-read.csv(file = "~/Downloads/vetted.lfmm.hum.outliers.csv")
lfmm.hum.env.assoc.outliers<-read.csv(file = "~/Downloads/vetted.lfmm.hum.env.csv")
#build a figure that is:
#line1: manhattan plot lfmm hum pvals, with red lines indicating vetted env. assoc. SNPs
#line2-5: genome plots of selective sweep metric for a single pop, with red lines indicating local outliers

#LFMM hum plot -log10(signficance vals) as manhattan plots
env<-ggplot(data=all.sigs, aes(x=order, y=-log10(hum.p), col = chrom))+
  geom_scattermore()+
  scale_color_manual(values=c("black","gray","black","gray","gray"))+
  theme(panel.grid=element_blank(), panel.background=element_blank())+
  geom_vline(xintercept=c(lfmm.hum.env.assoc.outliers$order),color='gray', alpha=.35)

GA.plot<-ggplot(data=GA, aes(x=order, y=h12))+
  ylim(c(0,1))+
  theme(panel.grid=element_blank(), panel.background=element_blank())+
  geom_vline(xintercept=c(lfmm.hum.single.pop.outliers$order[lfmm.hum.single.pop.outliers$outlier.pop == "X0.384"]), color='gray', alpha=.35)+
  geom_line()

AO.plot<-ggplot(data=AO, aes(x=order, y=h12))+
  ylim(c(0,1))+
  theme(panel.grid=element_blank(), panel.background=element_blank())+
  geom_vline(xintercept=c(lfmm.hum.single.pop.outliers$order[lfmm.hum.single.pop.outliers$outlier.pop == "X8.821"]), color='gray', alpha=.35)+
  geom_line()

GW.plot<-ggplot(data=GW, aes(x=order, y=h12))+
  ylim(c(0,1))+
  theme(panel.grid=element_blank(), panel.background=element_blank())+
  geom_vline(xintercept=c(lfmm.hum.single.pop.outliers$order[lfmm.hum.single.pop.outliers$outlier.pop == "X11.891"]), color='gray', alpha=.35)+
  geom_line()

UG.plot<-ggplot(data=UG, aes(x=order, y=h12))+
  ylim(c(0,1))+
  theme(panel.grid=element_blank(), panel.background=element_blank())+
  geom_vline(xintercept=c(lfmm.hum.single.pop.outliers$order[lfmm.hum.single.pop.outliers$outlier.pop == "X0.77"]), color='gray', alpha=.35)+
  geom_line()

KE.plot<-ggplot(data=KE, aes(x=order, y=h12))+
  ylim(c(0,1))+
  theme(panel.grid=element_blank(), panel.background=element_blank())+
  geom_vline(xintercept=c(lfmm.hum.single.pop.outliers$order[lfmm.hum.single.pop.outliers$outlier.pop == "X3.635" | lfmm.hum.single.pop.outliers$outlier.pop == "X3.862" ]), color='gray', alpha=.35)+
  geom_line()

BFS.plot<-ggplot(data=BFS, aes(x=order, y=h12))+
  ylim(c(0,1))+
  theme(panel.grid=element_blank(), panel.background=element_blank())+
  geom_vline(xintercept=c(lfmm.hum.single.pop.outliers$order[lfmm.hum.single.pop.outliers$outlier.pop == "X11.233"]), color='gray', alpha=.35)+
  geom_line()

grid.arrange(env,GA.plot,AO.plot,GW.plot,UG.plot,KE.plot,BFS.plot, nrow = 7)